-
Notifications
You must be signed in to change notification settings - Fork 13.3k
[LoongArch] Enable LoopTermFold Pass #130737
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-backend-loongarch Author: None (tangaac) ChangesFull diff: https://github.com/llvm/llvm-project/pull/130737.diff 5 Files Affected:
diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp
index 62b08be5435cd..53b884563ad88 100644
--- a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp
@@ -146,7 +146,9 @@ namespace {
class LoongArchPassConfig : public TargetPassConfig {
public:
LoongArchPassConfig(LoongArchTargetMachine &TM, PassManagerBase &PM)
- : TargetPassConfig(TM, PM) {}
+ : TargetPassConfig(TM, PM) {
+ EnableLoopTermFold = true;
+ }
LoongArchTargetMachine &getLoongArchTargetMachine() const {
return getTM<LoongArchTargetMachine>();
diff --git a/llvm/test/CodeGen/LoongArch/opt-pipeline.ll b/llvm/test/CodeGen/LoongArch/opt-pipeline.ll
index ab76d4e998d2b..c6c1c124c8314 100644
--- a/llvm/test/CodeGen/LoongArch/opt-pipeline.ll
+++ b/llvm/test/CodeGen/LoongArch/opt-pipeline.ll
@@ -44,6 +44,7 @@
; LAXX-NEXT: Canonicalize Freeze Instructions in Loops
; LAXX-NEXT: Induction Variable Users
; LAXX-NEXT: Loop Strength Reduction
+; LAXX-NEXT: Loop Terminator Folding
; LAXX-NEXT: Basic Alias Analysis (stateless AA impl)
; LAXX-NEXT: Function Alias Analysis Results
; LAXX-NEXT: Merge contiguous icmps into a memcmp
diff --git a/llvm/test/CodeGen/LoongArch/preferred-alignments.ll b/llvm/test/CodeGen/LoongArch/preferred-alignments.ll
index c3618db646016..2e12d7ed9d13f 100644
--- a/llvm/test/CodeGen/LoongArch/preferred-alignments.ll
+++ b/llvm/test/CodeGen/LoongArch/preferred-alignments.ll
@@ -10,13 +10,13 @@ define signext i32 @sum(ptr noalias nocapture noundef readonly %0, i32 noundef s
; LA464-NEXT: # %bb.1:
; LA464-NEXT: move $a2, $zero
; LA464-NEXT: bstrpick.d $a1, $a1, 31, 0
+; LA464-NEXT: alsl.d $a1, $a1, $a0, 2
; LA464-NEXT: .p2align 4, , 16
; LA464-NEXT: .LBB0_2: # =>This Inner Loop Header: Depth=1
; LA464-NEXT: ld.w $a3, $a0, 0
-; LA464-NEXT: add.w $a2, $a3, $a2
-; LA464-NEXT: addi.d $a1, $a1, -1
; LA464-NEXT: addi.d $a0, $a0, 4
-; LA464-NEXT: bnez $a1, .LBB0_2
+; LA464-NEXT: add.w $a2, $a3, $a2
+; LA464-NEXT: bne $a0, $a1, .LBB0_2
; LA464-NEXT: # %bb.3:
; LA464-NEXT: move $a0, $a2
; LA464-NEXT: ret
diff --git a/llvm/test/Transforms/LoopStrengthReduce/LoongArch/lit.local.cfg b/llvm/test/Transforms/LoopStrengthReduce/LoongArch/lit.local.cfg
new file mode 100644
index 0000000000000..cc24278acbb41
--- /dev/null
+++ b/llvm/test/Transforms/LoopStrengthReduce/LoongArch/lit.local.cfg
@@ -0,0 +1,2 @@
+if not "LoongArch" in config.root.targets:
+ config.unsupported = True
diff --git a/llvm/test/Transforms/LoopStrengthReduce/LoongArch/term-fold-crash.ll b/llvm/test/Transforms/LoopStrengthReduce/LoongArch/term-fold-crash.ll
new file mode 100644
index 0000000000000..54fe262aca941
--- /dev/null
+++ b/llvm/test/Transforms/LoopStrengthReduce/LoongArch/term-fold-crash.ll
@@ -0,0 +1,43 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes=loop-reduce,loop-term-fold -mtriple=loongarch64 < %s | FileCheck %s
+
+define void @test(ptr %p, i8 %arg, i32 %start) {
+; CHECK-LABEL: define void @test(
+; CHECK-SAME: ptr [[P:%.*]], i8 [[ARG:%.*]], i32 [[START:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[ARG]] to i32
+; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[CONV]], 1
+; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[START]], [[SHR]]
+; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[TMP0]], 1
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[ADD810:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[ADD:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[IDXPROM2:%.*]] = zext i32 [[ADD810]] to i64
+; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr i8, ptr [[P]], i64 [[IDXPROM2]]
+; CHECK-NEXT: [[V:%.*]] = load i8, ptr [[ARRAYIDX3]], align 1
+; CHECK-NEXT: [[ADD]] = add i32 [[ADD810]], 1
+; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq i32 [[ADD]], [[TMP1]]
+; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label %[[EXIT:.*]], label %[[FOR_BODY]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ %conv = zext i8 %arg to i32
+ %shr = lshr i32 %conv, 1
+ %wide.trip.count = zext nneg i32 %shr to i64
+ br label %for.body
+
+for.body:
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %add810 = phi i32 [ %start, %entry ], [ %add, %for.body ]
+ %idxprom2 = zext i32 %add810 to i64
+ %arrayidx3 = getelementptr i8, ptr %p, i64 %idxprom2
+ %v = load i8, ptr %arrayidx3, align 1
+ %add = add i32 %add810, 1
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %exitcond.not = icmp eq i64 %indvars.iv, %wide.trip.count
+ br i1 %exitcond.not, label %exit, label %for.body
+
+exit:
+ ret void
+}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What's the effect after enabling this pass?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Seems that it's unnecessary to borrow this crash
test from RISCV.
This will reduce one clang temp.c -S -O1 void foo(int *__restrict a, short int * __restrict b, int n) {
for(int i = 0 ; i < n; i++ )
a[i] = b[i];
} before # %bb.0:
ori $a3, $zero, 1
blt $a2, $a3, .LBB0_2
.p2align 4, , 16
.LBB0_1: # =>This Inner Loop Header: Depth=1
ld.h $a3, $a1, 0
st.w $a3, $a0, 0
addi.d $a0, $a0, 4
addi.d $a2, $a2, -1
addi.d $a1, $a1, 2
bnez $a2, .LBB0_1
.LBB0_2:
ret after # %bb.0: # %entry
ori $a3, $zero, 1
blt $a2, $a3, .LBB0_3
# %bb.1: # %for.body.preheader
alsl.d $a2, $a2, $a0, 2
.p2align 4, , 16
.LBB0_2: # %for.body
# =>This Inner Loop Header: Depth=1
ld.h $a3, $a1, 0
st.w $a3, $a0, 0
addi.d $a0, $a0, 4
addi.d $a1, $a1, 2
bne $a0, $a2, .LBB0_2
.LBB0_3: # %for.cond.cleanup
ret
|
I see. Could you pre-commit a dedicate IR testcase in llvm/test/CodeGen/LoongArch? |
No description provided.